library(limma)
library(biomaRt)
library(edgeR)
library(AnnotationHub)
library(tidyverse)
library(magrittr)
library(pander)
library(RColorBrewer)
library(ggrepel)
library(swfdr)
library(qvalue)
library(scales)
library(here)
library(variancePartition)
library(org.Hs.eg.db)
library(plyr)
library(ggraph)
library(tidygraph)
library(fgsea)
library(pheatmap)
library(RUVSeq)
library(GenomicRanges)
library(cqn)
library(kableExtra)
library(goseq)
library(org.Dr.eg.db)
library(pathview)
theme_set(theme_bw())
panderOptions("big.mark", ",")
panderOptions("table.split.table", Inf)
panderOptions("table.style", "rmarkdown")
if (interactive()) setwd(here::here("R"))
# Get GC content and lengths
gcTrans <- url("https://uofabioinformaticshub.github.io/Ensembl_GC/Release96/Danio_rerio.GRCz11.96.rds") %>%
readRDS()
# Convert to the gene-level with average length, average GC, max length, and GC of longest transcript for all transcripts assigned to a single gene.
# gcGene <- gcTrans %>%
# split(f = .$gene_id) %>%
# mclapply(function(x){
# gr <- reduce(x)
# df <- DataFrame(
# gene_id = unique(x$gene_id),
# gene_symbol = unique(x$gene_symbol),
# aveLength = round(mean(x$length),0) %>% as.integer(),
# aveGc = sum(x$length * x$gc) / sum(x$length),
# maxLength = max(x$length),
# longestGc = x$gc[which.max(x$length)[[1]]]
# )
# mcols(gr) <- df
# gr
# }, mc.cores = 4) %>%
# GRangesList() %>%
# unlist() %>%
# na.omit()
# x# This takes a reasonable amount of time
# # Save as .rds file for faster loading
# saveRDS(gcGene, file = "../R/gcGene.rds")
gcGene <- readRDS("../R/gcGene.rds")
Annotation was set up as a EnsDb object based on Ensembl Release 96
ah <- AnnotationHub() %>%
subset(species == "Danio rerio") %>%
subset(dataprovider == "Ensembl") %>%
subset(rdataclass == "EnsDb")
ensDb <- ah[["AH69169"]]
genes <- genes(ensDb)
transGR <- transcripts(ensDb)
mcols(genes) <- mcols(genes)[c("gene_id", "gene_name",
"gene_biotype", "entrezid","description")] %>%
as.data.frame() %>%
dplyr::select(-entrezid) %>%
left_join(as.data.frame(mcols(gcGene))) %>%
distinct(gene_id, .keep_all = TRUE) %>%
set_rownames(.$gene_id) %>%
DataFrame() %>%
.[names(genes),]
genesGR <- genes(ensDb)
DrEns2Symbol <- genesGR %>%
mcols() %>%
as_tibble() %>%
dplyr::select(gene_id, gene_name)
Prior to count-level analysis, the initial dataset was pre-processed using the following steps:
After trimming alignment was performed using STAR v2.5.3a to the Danio rerio genome included in Ensembl Release 96 (GRCz11). Aligned reads were counted for each gene if the following criteria were satisfied:
minSamples <- 6
minCpm <- 1
counts <- file.path("../2_alignedData/featureCounts/genes.out") %>%
read_tsv() %>%
set_colnames(basename(colnames(.))) %>%
set_colnames(str_remove(colnames(.), "Aligned.+")) %>%
gather(key = "Library", value = "Counts", -Geneid) %>%
dplyr::mutate(Sample = str_remove_all(Library, "_2")) %>%
group_by(Geneid, Sample) %>%
dplyr::summarise(Counts = sum(Counts)) %>%
tidyr::spread(key = "Sample", value = "Counts") %>%
as.data.frame() %>%
column_to_rownames("Geneid")
genes2keep <- cpm(counts) %>%
is_greater_than(minCpm) %>%
rowSums() %>%
is_weakly_greater_than(minSamples)
Counts were then loaded and summed across replicate sequencing runs. Genes were only retained if receiving more than 1 cpm in at least 6 samples. This filtering step discarded 12,661 genes as undetectable and retained 19,396 genes for further analysis.
plotDensities(cpm(counts, log = TRUE), legend = FALSE, main = "a) All genes")
plotDensities(cpm(counts[genes2keep,], log = TRUE), legend = FALSE, main = "b) Retained genes")
Distribution of logCPM values for a) all and b) retained genes
dgeList <- counts %>%
.[genes2keep,] %>%
DGEList(
samples = tibble(
sample = colnames(.),
group = str_replace_all(sample, "[0-9]*[A-Z]([12])Lardelli", "\\1"),
pair = str_replace_all(sample, "[0-9]*([A-Z])[0-9].+", "\\1")
) %>%
as.data.frame(),
genes = genes[rownames(.)] %>%
as.data.frame() %>%
dplyr::select(
ensembl_gene_id = gene_id,
chromosome_name = seqnames,
description,
external_gene_name = gene_name,
aveLength,
aveGc,
maxLength,
longestGc
)
) %>%
calcNormFactors(method = "TMM") %>%
estimateDisp()
## Design matrix not provided. Switch to the classic mode.
dgeList$samples$Genotype <- c("Q96K97del/+", "WT")[dgeList$samples$group] %>%
factor(levels = c("WT", "Q96K97del/+"))
Counts were then formed into a DGEList object. Library sizes after alignment, counting and gene filtering ranged between 44,498,373 and 52,779,608 reads.
v <- dgeList %>%
voomWithQualityWeights(design = matrix(1, nrow = ncol(.)))
Sample weights with the ideal equal weight of 1 shown as a horizontal line.
pca <- v$E %>%
t() %>%
prcomp()
summary(pca)$importance %>% pander(split.tables = Inf)
| Â | PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Standard deviation | 21.98 | 17.89 | 14.06 | 12.92 | 11.64 | 11.14 | 10.79 | 10.61 | 9.846 | 7.989 | 7.633 | 5.872e-14 |
| Proportion of Variance | 0.2576 | 0.1706 | 0.1055 | 0.08908 | 0.07222 | 0.06621 | 0.06209 | 0.05998 | 0.05169 | 0.03403 | 0.03107 | 0 |
| Cumulative Proportion | 0.2576 | 0.4282 | 0.5336 | 0.6227 | 0.6949 | 0.7611 | 0.8232 | 0.8832 | 0.9349 | 0.9689 | 1 | 1 |
PCA of all samples. Point sizes indicate library sizes with the combination of PC1 & PC2 appearing to capture a considerable amount of this variation.
###DGE Analysis
To run a mixed-effects (nested) analysis we must use a voom object. Here we are looking at the change due to genotype within each pair. We then look at the mean of the differences, instead of the difference in means.
voomData <- voom(dgeList, plot = FALSE)
fm <- ~ Genotype + (1|pair)
contrMat <- getContrast(voomData, fm, voomData$targets, "GenotypeQ96K97del/+")
library(doParallel)
cl <- makeCluster(7)
registerDoParallel(cl)
fit <- dream(voomData, fm, voomData$targets, contrMat) %>% eBayes()
## Projected memory usage: > 506.5 Mb
##
## Finished...
## Total: 293 s
# stop cluster
stopCluster(cl)
deGenes <- topTable(fit, n = Inf) %>%
as_tibble() %>%
mutate(
q = qvalue(P.Value)$qvalues,
Sig = q < 0.05) %>%
dplyr::select(
ensembl_gene_id, external_gene_name, logFC, t,
P.Value, Sig, q, aveGc, aveLength, maxLength, longestGc,AveExpr)
head(deGenes)
## # A tibble: 6 x 12
## ensembl_gene_id external_gene_n… logFC t P.Value Sig q aveGc
## <chr> <chr> <dbl> <dbl> <dbl> <lgl> <dbl> <dbl>
## 1 ENSDARG0000009… CABZ01034698.2 1.54 7.75 4.15e-6 TRUE 0.0215 0.359
## 2 ENSDARG0000009… si:ch211-213a13… 1.02 7.12 1.25e-6 TRUE 0.0155 0.399
## 3 ENSDARG0000007… leng8 0.324 7.38 6.94e-6 TRUE 0.0215 0.431
## 4 ENSDARG0000007… tex11 1.56 6.28 5.75e-6 TRUE 0.0215 0.456
## 5 ENSDARG0000009… si:dkey-9i23.15 -0.641 -6.00 9.84e-6 TRUE 0.0225 0.413
## 6 ENSDARG0000003… fuom -0.924 -5.95 1.09e-5 TRUE 0.0225 0.342
## # … with 4 more variables: aveLength <int>, maxLength <int>,
## # longestGc <dbl>, AveExpr <dbl>
deGenesSig <- deGenes %>%
dplyr::filter(Sig == TRUE)
Volcano plot highlighting DE genes. Genes indicated in red belowd were considered as DE, which receive a q-value < 0.05.
logFC plotted against expression level with significant DE genes shown in red.
###RUV treatment (Remove Unwanted Variation) # Find some ‘unchanged’ genes. Grab the lowest ranked 5000
genes4Control <- deGenes %>%
arrange(desc(P.Value)) %>%
dplyr::slice(1:5000) %>%
.[["ensembl_gene_id"]]
length(genes4Control)
## [1] 5000
k <- 1
ruv <- RUVg(
dgeList$counts,
cIdx = genes4Control,
k = k,
isLog = FALSE,
round = TRUE
)
dgeList$samples <- cbind(dgeList$samples, ruv$W)
pcaRUV <- ruv$normalizedCounts %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
summary(pcaRUV)$importance %>% pander(split.tables = Inf)
| Â | PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Standard deviation | 17.5 | 13.57 | 12.59 | 11.22 | 10.81 | 10.43 | 10.21 | 9.442 | 7.626 | 7.252 | 0.3357 | 4.834e-14 |
| Proportion of Variance | 0.2347 | 0.1411 | 0.1215 | 0.09653 | 0.0896 | 0.08332 | 0.07989 | 0.06831 | 0.04456 | 0.0403 | 9e-05 | 0 |
| Cumulative Proportion | 0.2347 | 0.3759 | 0.4974 | 0.5939 | 0.6835 | 0.7669 | 0.8467 | 0.9151 | 0.9596 | 0.9999 | 1 | 1 |
pcaRUV$x %>%
cbind(dgeList$samples[rownames(.),]) %>%
ggplot(aes(PC1, PC2, colour = Genotype)) +
geom_point() +
stat_ellipse(aes(fill = Genotype), geom = "polygon", alpha = 0.2) +
xlab(
paste0(
"PC1 (", percent(summary(pcaRUV)$importance[2, "PC1"]), ")"
)
) +
ylab(
paste0(
"PC2 (", percent(summary(pcaRUV)$importance[2, "PC2"]), ")"
)
)
expDesign <- model.matrix(~0 + pair + Genotype + W_1, dgeList$samples) %>%
set_colnames(str_replace(colnames(.), pattern = "Genotype.+", "Mutant"))
dgeList %<>%
estimateGLMCommonDisp(expDesign) %>%
estimateGLMTagwiseDisp(expDesign)
topTable <- dgeList %>%
glmFit(expDesign) %>%
glmLRT(coef = "Mutant") %>%
topTags(n = Inf) %>%
as.data.frame() %>%
set_colnames(gsub("ID.", "", colnames(.))) %>%
as_tibble() %>%
mutate(
DE = FDR < 0.01,
rankstat = -sign(logFC)*log10(PValue)
) %>%
dplyr::select(
ensembl_gene_id, external_gene_name, logFC, logCPM, LR,
PValue, FDR, DE,aveGc, aveLength, rankstat
) %>%
arrange(PValue)
topTableDE <- topTable %>%
dplyr::filter(DE == TRUE)
write.csv(topTableDE,"../R/DEgenes.csv")
tested <- c("si:ch211-213a13.2", "si:ch211-11p18.6", "mdh1ab", "CABZ01034698.2")
# topTable %>%
topTable %>%
ggplot(aes(logCPM, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5) +
geom_smooth(se = FALSE) +
geom_hline(yintercept = c(-1, 1)*0.5, linetype = 2) +
geom_text_repel(
aes(label = external_gene_name, colour = DE),
data = . %>%
dplyr::filter(abs(logFC) > 1.2 & DE | external_gene_name %in% tested),
show.legend = FALSE
) +
scale_colour_manual(values = c("grey50", "red"))
topTable %>%
mutate(DE = FDR < 0.01) %>%
ggplot(aes(logFC, -log10(PValue))) +
geom_point(aes(colour = DE), alpha = 0.5) +
geom_vline(xintercept = c(-1, 1)*0.5, linetype = 2) +
geom_text_repel(
aes(label = external_gene_name, colour = DE),
data = . %>%
dplyr::filter(abs(logFC) > 1.2 & DE | external_gene_name %in% tested),
show.legend = FALSE
) +
scale_colour_manual(values = c("grey50", "red"))
topTable %>%
dplyr::select(ensembl_gene_id, RUV = logFC, DE) %>%
left_join(deGenes) %>%
dplyr::rename(Voom = logFC) %>%
mutate(
Status = case_when(
Sig & DE ~ "Both",
Sig & !DE ~ "Voom Only",
!Sig & DE ~ "RUV Only",
!Sig & !DE ~ "Not DE"
)
) %>%
ggplot(aes(Voom, RUV,)) +
geom_point(aes(colour = Status), alpha = 0.5) +
geom_abline(slope= 1, linetype = 2) +
geom_vline(xintercept = c(-1, 1)*0.5) +
geom_hline(yintercept = c(-1, 1)*0.5) +
scale_colour_manual(values = c("green", "grey50", "red", "blue"))
The 231 genes considered as DE using an FDR of 0.01 and logFC beyond the range \(\pm 0.5\) were then inspected to confirm that the counts support their inclusion as DE.
Here plot CPM for DE genes having a FDR below 0.01 and logFC beyond the range \(\pm 1.2\)
Expression values for each of the potential DE genes using raw CPM values and a log-scale y-axis.
#GC content before RUV
deGenes %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, logFC)) +
geom_point(aes(colour = Sig), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content"
) +
scale_color_manual(values = c("grey70", "red"))
deGenes %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, t)) +
geom_point(aes(colour = Sig), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
y = "Ranking Statistic"
) +
scale_color_manual(values = c("grey70", "red"))
#Gene length before RUV
deGenes %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, logFC)) +
geom_point(aes(colour = Sig), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene length",
title = "without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
deGenes %>%
dplyr::arrange(desc(P.Value)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, t)) +
geom_point(aes(colour = Sig), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene Length",
y = "Ranking Statistic",
title = "without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
#GC content after RUV
topTable %>%
dplyr::arrange(desc(PValue)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content"
) +
scale_color_manual(values = c("grey70", "red"))
topTable %>%
dplyr::arrange(desc(PValue)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveGc, rankstat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
ylim(-10, 10) +
geom_smooth(se = FALSE) +
labs(
x = "GC Content",
y = "Ranking Statistic"
) +
scale_color_manual(values = c("grey70", "red"))
#Gene length after RUV
topTable %>%
dplyr::arrange(desc(PValue)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, logFC)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
geom_smooth(se = FALSE) +
labs(
x = "Gene length",
title = "without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
topTable %>%
dplyr::arrange(desc(PValue)) %>%
dplyr::filter(!is.na(aveGc)) %>%
ggplot(aes(aveLength, rankstat)) +
geom_point(aes(colour = DE), alpha = 0.5, show.legend = FALSE) +
ylim(-10, 10) +
geom_smooth(se = FALSE) +
labs(
x = "Gene Length",
y = "Ranking Statistic",
title = "without cqn"
) +
scale_color_manual(values = c("grey70", "red")) +
scale_x_log10(labels = comma)
###GO(Gene Ontology) analysis
The first step here is to map the Ensembl zebrafish gene IDs to Human Entrez gene IDs.
goSummaries <- url("https://uofabioinformaticshub.github.io/summaries2GO/data/goSummaries.RDS") %>%
readRDS()
# Load id conversion file
idConvert <- read_csv2("../files/zf2human_withEntrezIDs.csv") %>%
dplyr::select(Geneid = zfID, EntrezID = Entrez) %>%
mutate(EntrezID = as.character(EntrezID))
# Create function to convert ids, but only keep the IDs in the voom object
convertHsEG2Dr <- function(ids, df = idConvert){
dplyr::filter(df, EntrezID %in% ids)$Geneid
}
# Conversion of zebrafish ensembl ID to zebrafish symbol, for plotting on network analyses
idConvertSymbol <- read_csv2("../files/zf2human_withEntrezIDs.csv") %>%
dplyr::select(label = zfID, symbol = zfName) %>%
na.omit() %>%
unique()
# Import hallmark human gene genesets and tidy gene set names
# .gmt files downloaded from:
# http://software.broadinstitute.org/gsea/downloads.jsp
# http://data.wikipathways.org/20190610/
hallmark <- gmtPathways("../files/h.all.v6.2.entrez.gmt") %>%
mclapply(convertHsEG2Dr, mc.cores = 4) %>%
set_names(str_remove_all(names(.), "HALLMARK_"))
kegg <- gmtPathways("../files/c2.cp.kegg.v6.2.entrez.gmt") %>%
mclapply(convertHsEG2Dr, mc.cores = 4) %>%
set_names(str_remove_all(names(.), "KEGG_"))
wiki <- gmtPathways("../files/wikipathways-20190610-gmt-Homo_sapiens.gmt") %>%
mclapply(convertHsEG2Dr, mc.cores = 4) %>%
set_names(str_remove_all(names(.), "%.+"))
###GO seq analysis
pwf <- topTable %>%
dplyr::select(ensembl_gene_id, logFC, DE, aveLength, aveGc) %>%
mutate(nGC = aveLength*aveGc) %>%
distinct(ensembl_gene_id, .keep_all = TRUE) %>%
with(
nullp(
DEgenes = structure(
as.integer(DE), names = ensembl_gene_id
),
genome = "danRer10",
id = "ensGene",
# bias.data = log(nGC),
bias.data = aveLength,
plot.fit = FALSE
)
)
Probability weight function for a gene being considered as DE based on the number of GC bases per gene.
Mappings from gene to pathway are also required, instead of the previous pathway to gene.
hallmarkByGene <- names(hallmark) %>%
lapply(function(x){
tibble(pathway = x, ensembl_gene_id = hallmark[[x]])
}) %>%
bind_rows() %>%
dplyr::filter(!is.na(ensembl_gene_id)) %>%
split(f = .$ensembl_gene_id) %>%
lapply(extract2, "pathway")
hallmarkGoseq <- pwf %>%
goseq(gene2cat = hallmarkByGene) %>%
as_tibble %>%
mutate(FDR = p.adjust(over_represented_pvalue, method = "fdr"))
| Pathway | numDEInCat | numInCat | PValue | FDR |
|---|---|---|---|---|
| G2M_CHECKPOINT | 15 | 212 | 8.438e-08 | 4.219e-06 |
| E2F_TARGETS | 10 | 185 | 0.0001866 | 0.004666 |
| ESTROGEN_RESPONSE_LATE | 8 | 226 | 0.01228 | 0.2046 |
| BILE_ACID_METABOLISM | 5 | 118 | 0.02236 | 0.2795 |
| CHOLESTEROL_HOMEOSTASIS | 4 | 86 | 0.03098 | 0.2928 |
The same approach was then applied to the set of KEGG gene sets
keggByGene <- names(kegg) %>%
lapply(function(x){
tibble(pathway = x, ensembl_gene_id = kegg[[x]])
}) %>%
bind_rows() %>%
split(f = .$ensembl_gene_id) %>%
lapply(extract2, "pathway")
keggGoseq <- pwf %>%
goseq(gene2cat = keggByGene) %>%
as_tibble %>%
mutate(FDR = p.adjust(over_represented_pvalue, method = "fdr"))
| Pathway | numDEInCat | numInCat | PValue | FDR |
|---|---|---|---|---|
| DNA_REPLICATION | 7 | 30 | 9.749e-09 | 1.813e-06 |
| CELL_CYCLE | 8 | 116 | 1.018e-05 | 0.0009467 |
| FATTY_ACID_METABOLISM | 3 | 46 | 0.009046 | 0.4674 |
| RETINOL_METABOLISM | 4 | 90 | 0.01005 | 0.4674 |
| BIOSYNTHESIS_OF_UNSATURATED_FATTY_ACIDS | 2 | 21 | 0.01668 | 0.5518 |
vapply(keggByGene, function(x){"DNA_REPLICATION" %in% x}, logical(1)) %>% which() %>% names() %>% intersect(dplyr::filter(topTable, DE)$ensembl_gene_id)
## [1] "ENSDARG00000019507" "ENSDARG00000024204" "ENSDARG00000040041"
## [4] "ENSDARG00000057683" "ENSDARG00000058533" "ENSDARG00000101180"
## [7] "ENSDARG00000102798"
# mcm5 mcm3 mcm4 mcm6 pole mcm2 mcm7
vapply(keggByGene, function(x){"CELL_CYCLE" %in% x}, logical(1)) %>% which() %>% names() %>% intersect(dplyr::filter(topTable, DE)$ensembl_gene_id)
## [1] "ENSDARG00000011094" "ENSDARG00000019507" "ENSDARG00000024204"
## [4] "ENSDARG00000040041" "ENSDARG00000057683" "ENSDARG00000077029"
## [7] "ENSDARG00000101180" "ENSDARG00000102798"
vapply(hallmarkByGene, function(x){"G2M_CHECKPOINT" %in% x}, logical(1)) %>% which() %>% names() %>% intersect(dplyr::filter(topTable, DE)$ensembl_gene_id)
## [1] "ENSDARG00000002403" "ENSDARG00000007377" "ENSDARG00000010948"
## [4] "ENSDARG00000011094" "ENSDARG00000019507" "ENSDARG00000024204"
## [7] "ENSDARG00000024488" "ENSDARG00000055753" "ENSDARG00000057683"
## [10] "ENSDARG00000058533" "ENSDARG00000062152" "ENSDARG00000070239"
## [13] "ENSDARG00000077029" "ENSDARG00000102798" "ENSDARG00000114670"
wikiByGene <- names(wiki) %>%
lapply(function(x){
tibble(pathway = x, ensembl_gene_id = wiki[[x]])
}) %>%
bind_rows() %>%
split(f = .$ensembl_gene_id) %>%
lapply(extract2, "pathway")
wikiGoseq <- pwf %>%
goseq(gene2cat = wikiByGene) %>%
as_tibble %>%
mutate(FDR = p.adjust(over_represented_pvalue, method = "fdr"))
| Pathway | numDEInCat | numInCat | PValue | FDR |
|---|---|---|---|---|
| DNA Replication | 7 | 35 | 9.925e-08 | 5.201e-05 |
| Retinoblastoma Gene in Cancer | 9 | 82 | 2.47e-07 | 6.471e-05 |
| G1 to S cell cycle control | 7 | 60 | 4.021e-06 | 0.0007024 |
| Cell Cycle | 8 | 112 | 2.947e-05 | 0.003861 |
| Ciliary landscape | 9 | 210 | 0.0004896 | 0.05131 |
goByGene <- links(org.Dr.egGO2ALLEGS) %>%
as_tibble() %>%
left_join(
links(org.Dr.egENSEMBL2EG)
) %>%
distinct(ensembl_id, go_id) %>%
dplyr::filter(ensembl_id %in% topTable$ensembl_gene_id) %>%
split(f = .$ensembl_id) %>%
lapply(extract2, "go_id")
goSummaries %<>%
dplyr::rename(category =id)
goGoseq <- pwf %>%
goseq(gene2cat = goByGene) %>%
left_join(goSummaries) %>%
as_tibble %>%
mutate(FDR = p.adjust(over_represented_pvalue, method = "fdr"))
goGoseq %>%
dplyr::filter(FDR < 0.05, shortest_path > 4 | terminal_node == TRUE) %>%
dplyr::select(
Pathway = category,
starts_with("num"),
term,
shortest_path,
longest_path,
ontology,
PValue = over_represented_pvalue,
FDR
) %>%
pander(
caption = "Most highly ranked GO pathways.",
justify = "lrrrrrrrr"
)
| Pathway | numDEInCat | numInCat | term | shortest_path | longest_path | ontology | PValue | FDR |
|---|---|---|---|---|---|---|---|---|
| GO:0036388 | 6 | 7 | pre-replicative complex assembly | 6 | 9 | BP | 1.302e-11 | 4.329e-08 |
| GO:0071103 | 12 | 74 | DNA conformation change | 5 | 5 | BP | 2.921e-11 | 7.769e-08 |
| GO:0000727 | 6 | 9 | double-strand break repair via break-induced replication | 8 | 10 | BP | 1.424e-10 | 3.156e-07 |
| GO:0006260 | 12 | 100 | DNA replication | 5 | 7 | BP | 1.347e-09 | 2.239e-06 |
| GO:0003688 | 6 | 14 | DNA replication origin binding | 7 | 7 | MF | 4.753e-09 | 5.747e-06 |
| GO:0006270 | 6 | 17 | DNA replication initiation | 5 | 9 | BP | 1.909e-08 | 1.953e-05 |
| GO:0032508 | 6 | 17 | DNA duplex unwinding | 7 | 7 | BP | 2.102e-08 | 1.996e-05 |
| GO:0032392 | 6 | 18 | DNA geometric change | 6 | 6 | BP | 3.076e-08 | 2.727e-05 |
| GO:0006261 | 9 | 64 | DNA-dependent DNA replication | 6 | 8 | BP | 3.929e-08 | 3.266e-05 |
| GO:0017116 | 4 | 5 | single-stranded DNA-dependent ATP-dependent DNA helicase activity | 5 | 11 | MF | 7.479e-08 | 5.526e-05 |
| GO:0008094 | 8 | 57 | DNA-dependent ATPase activity | 9 | 9 | MF | 2.065e-07 | 0.0001377 |
| GO:0006310 | 10 | 101 | DNA recombination | 5 | 7 | BP | 2.263e-07 | 0.0001433 |
| GO:0065004 | 8 | 62 | protein-DNA complex assembly | 5 | 6 | BP | 4.786e-07 | 0.0002652 |
| GO:0043142 | 4 | 8 | single-stranded DNA-dependent ATPase activity | 10 | 10 | MF | 1.018e-06 | 0.0005415 |
| GO:0006268 | 4 | 9 | DNA unwinding involved in DNA replication | 7 | 9 | BP | 1.699e-06 | 0.0008069 |
| GO:0006281 | 13 | 232 | DNA repair | 5 | 7 | BP | 2.258e-06 | 0.0009972 |
| GO:0000724 | 7 | 54 | double-strand break repair via homologous recombination | 7 | 9 | BP | 2.399e-06 | 0.0009972 |
| GO:0000725 | 7 | 54 | recombinational repair | 6 | 8 | BP | 2.399e-06 | 0.0009972 |
| GO:0004386 | 8 | 96 | helicase activity | 7 | 7 | MF | 1.055e-05 | 0.003421 |
| GO:0006302 | 8 | 95 | double-strand break repair | 6 | 8 | BP | 1.167e-05 | 0.003609 |
| GO:0006271 | 4 | 14 | DNA strand elongation involved in DNA replication | 6 | 9 | BP | 1.524e-05 | 0.004607 |
| GO:0003697 | 6 | 47 | single-stranded DNA binding | 5 | 5 | MF | 1.584e-05 | 0.00468 |
| GO:0006323 | 6 | 50 | DNA packaging | 6 | 6 | BP | 1.831e-05 | 0.005294 |
| GO:0022616 | 4 | 15 | DNA strand elongation | 5 | 7 | BP | 2.027e-05 | 0.005735 |
| GO:0005212 | 6 | 45 | structural constituent of eye lens | 2 | 2 | MF | 2.2e-05 | 0.006097 |
| GO:0006826 | 4 | 21 | iron ion transport | 8 | 8 | BP | 8.631e-05 | 0.02166 |
| GO:1990518 | 2 | 2 | single-stranded DNA-dependent ATP-dependent 3’-5’ DNA helicase activity | 6 | 12 | MF | 0.0001391 | 0.03425 |
goGoseqTop <- goGoseq %>%
dplyr::filter(FDR < 0.05, shortest_path > 4 | terminal_node == TRUE) %>%
dplyr::select(
Pathway = category,
starts_with("num"),
term,
shortest_path,
longest_path,
ontology,
PValue = over_represented_pvalue,
FDR)
write.csv(goGoseqTop, "../R/GoTerms.csv")
vapply(goByGene, function(x){"GO:0005198" %in% x}, logical(1)) %>% which() %>% names() %>% intersect(dplyr::filter(topTable, DE)$ensembl_gene_id)
## [1] "ENSDARG00000011998" "ENSDARG00000014803" "ENSDARG00000024847"
## [4] "ENSDARG00000026726" "ENSDARG00000030215" "ENSDARG00000030723"
## [7] "ENSDARG00000036832" "ENSDARG00000037402" "ENSDARG00000037845"
## [10] "ENSDARG00000041295" "ENSDARG00000042708" "ENSDARG00000044976"
## [13] "ENSDARG00000052000" "ENSDARG00000053502" "ENSDARG00000054753"
## [16] "ENSDARG00000058799" "ENSDARG00000069093" "ENSDARG00000069817"
## [19] "ENSDARG00000073699" "ENSDARG00000074531" "ENSDARG00000077403"
## [22] "ENSDARG00000094466" "ENSDARG00000095147" "ENSDARG00000098591"
## [25] "ENSDARG00000115701"
vapply(goByGene, function(x){"GO:0007219" %in% x}, logical(1)) %>% which() %>% names() %>% intersect(dplyr::filter(topTable, DE)$ensembl_gene_id)
## [1] "ENSDARG00000004232" "ENSDARG00000026629"
vapply(goByGene, function(x){"GO:0006826" %in% x}, logical(1)) %>% which() %>% names() %>% intersect(dplyr::filter(topTable, DE)$ensembl_gene_id)
## [1] "ENSDARG00000016771" "ENSDARG00000077360" "ENSDARG00000077372"
## [4] "ENSDARG00000094210"
## Get significant GO terms
sigGo <- goGoseq %>%
dplyr::filter(FDR < 0.05) %>%
.$category
## Convert list of GO terms by gene to list of genes by GO term
geneByGo <- names(goByGene) %>%
lapply(function(x){tibble(gene_id = x, go_id = goByGene[[x]])}) %>%
bind_rows() %>%
split(f = .$go_id) %>%
lapply(magrittr::extract2, "gene_id")
## Get DE genes that belong to sigificant GO terms
goGenes <- lapply(
sigGo,
function(x){
geneByGo[[x]][geneByGo[[x]] %in% topTableDE$ensembl_gene_id]
}
)
names(goGenes) <- sigGo
## Make tibble of GO terms
goTerms <- names(goGenes) %>%
tibble::enframe(name = NULL, value = "label")
## Make tibble of genes
genes <- unlist(goGenes) %>%
unique() %>%
tibble::enframe(name = NULL, value = "label") %>%
mutate
## Join to create node list
nodes <- rbind(goTerms, genes) %>%
rowid_to_column("id")
## Create edge list
edges <- goGenes %>%
stack() %>%
as_tibble() %>%
dplyr::select(goTerm = ind, geneId = values) %>%
dplyr::arrange(goTerm) %>%
mutate(goTerm = as.character(goTerm)) %>%
left_join(nodes, by = c("goTerm" = "label")) %>%
dplyr::rename(from = id) %>%
left_join(nodes, by = c("geneId" = "label")) %>%
dplyr::rename(to = id) %>%
dplyr::select(from, to)
## Setup colours
colours <- length(sigGo) %>%
rainbow()
## Create tidygraph object
tidy <- tbl_graph(
nodes = nodes,
edges = edges,
directed = FALSE
) %>%
activate(nodes) %>%
mutate(
goTerms = case_when(
id <= length(sigGo) ~ label
),
term = Term(label),
gene_id = case_when(
!label %in% sigGo ~ label
),
colour = case_when(
id <= length(sigGo) ~ colours[id]
),
size = ifelse(id <= length(sigGo), 4, 1)
) %>%
left_join(DrEns2Symbol) %>%
activate(edges) %>%
mutate(
colour = case_when(
from <= length(sigGo) ~ colours[from]
)
)
## Set seed to allow same graph to be produced each time function is executed
set.seed(1234)
## Plot network graph
tested2 <- c("mcm2", "mcm3", "mcm4", "mcm5", "mcm6", "mcm7", "tfa", "tfr1b", "fthl31", "fthl30")
ggraph(tidy, layout = "fr") +
scale_fill_manual(
values = "white",
na.value = "gray80"
) +
scale_edge_color_manual(
values = "black",
na.value = "gray80"
) +
geom_edge_arc(
aes(color = "black"),
alpha = 0.5,
show.legend = FALSE,
curvature = 0.5
) +
geom_node_point(
aes(fill = "black", size = size),
shape = 21,
stroke = 0.5,
show.legend = FALSE
) +
geom_node_label(
aes(label = goTerms),
repel = TRUE,
size = 3,
alpha = 0.7,
label.padding = 0.1
) +
theme_graph() +
theme(legend.position = "none")
###Gene Set Enrichment analysis (GSEA)
ranks <- topTable %>%
mutate(stat = -sign(logFC) * log10(PValue)) %>%
dplyr::arrange(stat) %>%
with(structure(stat, names = ensembl_gene_id))
set.seed(22)
# Run GSEA for hallmark
fgseaHallmark <- fgsea(hallmark, ranks, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(padj = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
fgseaHallmarkTop <- fgseaHallmark %>%
dplyr::filter(padj < 0.05)
fgseaHallmarkTop %>%
dplyr::select(-leadingEdge, -nMoreExtreme) %>%
pander(
style = "rmarkdown",
split.tables = Inf,
justify = "lrrrrrr",
caption = paste(
"The", nrow(.), "most significantly enriched Hallmark pathways.",
"This corresponds to an FDR of", percent(max(.$FDR)))
)
| pathway | pval | FDR | ES | NES | size | padj |
|---|---|---|---|---|---|---|
| MYOGENESIS | 1.169e-05 | 7.728e-05 | -0.5068 | -1.781 | 278 | 0.0005843 |
| EPITHELIAL_MESENCHYMAL_TRANSITION | 1.193e-05 | 7.728e-05 | -0.5189 | -1.8 | 238 | 0.0005963 |
| MTORC1_SIGNALING | 1.198e-05 | 7.728e-05 | -0.5444 | -1.885 | 232 | 0.0005988 |
| GLYCOLYSIS | 1.199e-05 | 7.728e-05 | -0.5276 | -1.826 | 230 | 0.0005994 |
| ESTROGEN_RESPONSE_LATE | 1.202e-05 | 7.728e-05 | -0.5632 | -1.946 | 226 | 0.0006008 |
| OXIDATIVE_PHOSPHORYLATION | 1.206e-05 | 7.728e-05 | -0.5307 | -1.829 | 220 | 0.0006029 |
| G2M_CHECKPOINT | 1.211e-05 | 7.728e-05 | -0.6525 | -2.242 | 212 | 0.0006057 |
| E2F_TARGETS | 1.236e-05 | 7.728e-05 | -0.6436 | -2.181 | 185 | 0.0006182 |
| XENOBIOTIC_METABOLISM | 2.334e-05 | 0.0001224 | -0.4913 | -1.728 | 280 | 0.001167 |
| FATTY_ACID_METABOLISM | 2.449e-05 | 0.0001224 | -0.5435 | -1.855 | 198 | 0.001224 |
| CHOLESTEROL_HOMEOSTASIS | 2.744e-05 | 0.0001247 | -0.6212 | -1.924 | 86 | 0.001372 |
| ESTROGEN_RESPONSE_EARLY | 0.0001084 | 0.0004516 | -0.4879 | -1.683 | 222 | 0.005419 |
| COAGULATION | 0.0002037 | 0.0007835 | -0.5245 | -1.742 | 151 | 0.01019 |
| MYC_TARGETS_V1 | 0.0005558 | 0.001985 | -0.4612 | -1.588 | 218 | 0.02779 |
| COMPLEMENT | 0.0008723 | 0.002908 | -0.4488 | -1.555 | 235 | 0.04361 |
# Make a table plot of significant Hallmark pathways
if (interactive()) grid::grid.newpage()
plotGseaTable(
hallmark[dplyr::filter(fgseaHallmark, padj < 0.05)$pathway], ranks, fgseaHallmark, gseaParam = 0.5
)
# Set seed to enable reproducibility
set.seed(22)
# Run GSEA for KEGG
fgseaKEGG <- fgsea(kegg, ranks, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(padj = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
# Create an object of pathways with adjusted p-value < 0.05 for construction of network diagrams. This should be done differently next time, but too much work has been done to change it now.
fgseaKEGGTop <- fgseaKEGG %>%
dplyr::filter(padj < 0.05)
fgseaKEGGTop %>%
dplyr::select(-leadingEdge, -nMoreExtreme) %>%
pander(
style = "rmarkdown",
split.tables = Inf,
justify = "lrrrrrr",
caption = paste(
"The", nrow(.), "most significantly enriched KEGG pathways.",
"This corresponds to an FDR of", percent(max(.$FDR)))
)
| pathway | pval | FDR | ES | NES | size | padj |
|---|---|---|---|---|---|---|
| METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450 | 1.102e-05 | 0.0001888 | -0.6019 | -2.181 | 438 | 0.00205 |
| DRUG_METABOLISM_CYTOCHROME_P450 | 1.104e-05 | 0.0001888 | -0.5839 | -2.114 | 431 | 0.002054 |
| RETINOL_METABOLISM | 1.105e-05 | 0.0001888 | -0.517 | -1.871 | 429 | 0.002055 |
| DRUG_METABOLISM_OTHER_ENZYMES | 1.139e-05 | 0.0001888 | -0.544 | -1.938 | 333 | 0.002119 |
| STEROID_HORMONE_BIOSYNTHESIS | 1.146e-05 | 0.0001888 | -0.5685 | -2.02 | 321 | 0.002131 |
| STARCH_AND_SUCROSE_METABOLISM | 1.163e-05 | 0.0001888 | -0.6105 | -2.15 | 285 | 0.002164 |
| PORPHYRIN_AND_CHLOROPHYLL_METABOLISM | 1.164e-05 | 0.0001888 | -0.5904 | -2.079 | 284 | 0.002165 |
| PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS | 1.173e-05 | 0.0001888 | -0.6101 | -2.139 | 269 | 0.002182 |
| ASCORBATE_AND_ALDARATE_METABOLISM | 1.175e-05 | 0.0001888 | -0.6125 | -2.146 | 267 | 0.002185 |
| GLYCOLYSIS_GLUCONEOGENESIS | 1.345e-05 | 0.0001888 | -0.6896 | -2.178 | 99 | 0.002502 |
| ECM_RECEPTOR_INTERACTION | 1.36e-05 | 0.0001888 | -0.6702 | -2.093 | 91 | 0.002529 |
| FATTY_ACID_METABOLISM | 1.402e-05 | 0.0001888 | -0.7896 | -2.384 | 71 | 0.002607 |
| TYROSINE_METABOLISM | 1.402e-05 | 0.0001888 | -0.695 | -2.098 | 71 | 0.002607 |
| GLUTATHIONE_METABOLISM | 1.421e-05 | 0.0001888 | -0.8061 | -2.396 | 64 | 0.002643 |
| BETA_ALANINE_METABOLISM | 1.555e-05 | 0.0001928 | -0.8038 | -2.068 | 28 | 0.002892 |
| BUTANOATE_METABOLISM | 2.958e-05 | 0.0003439 | -0.7338 | -2.061 | 45 | 0.005502 |
| CELL_CYCLE | 3.942e-05 | 0.0004313 | -0.5798 | -1.87 | 116 | 0.007333 |
| NITROGEN_METABOLISM | 4.558e-05 | 0.000471 | -0.7684 | -2.061 | 35 | 0.008478 |
| PYRIMIDINE_METABOLISM | 9.477e-05 | 0.0009277 | -0.5889 | -1.85 | 95 | 0.01763 |
| SULFUR_METABOLISM | 0.000135 | 0.001255 | 0.5906 | 1.978 | 64 | 0.0251 |
| PROXIMAL_TUBULE_BICARBONATE_RECLAMATION | 0.0001476 | 0.001307 | -0.6879 | -1.939 | 46 | 0.02745 |
| OXIDATIVE_PHOSPHORYLATION | 0.0001551 | 0.001311 | -0.5359 | -1.753 | 131 | 0.02885 |
| DNA_REPLICATION | 0.0002314 | 0.001822 | -0.7475 | -1.948 | 30 | 0.04303 |
| HEMATOPOIETIC_CELL_LINEAGE | 0.000235 | 0.001822 | -0.5375 | -1.743 | 122 | 0.04372 |
pv.out <- pathview(gene.data = ranks,
pathway.id = "00010",
species = "Danio rerio",
gene.idtype = "ENSEMBL",
limit = list(gene=5, cpd=1))
## [1] "Note: 1192 of 19396 unique input IDs unmapped."
# Make a table plot of significant KEGG pathways
if (interactive()) grid::grid.newpage()
plotGseaTable(
kegg[fgseaKEGGTop$pathway], ranks, fgseaKEGG, gseaParam = 0.5
)
# Set seed to enable reproducibility
set.seed(22)
# Run GSEA for WikiPathways
fgseaWiki <- fgsea(wiki, ranks, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(padj = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
# Create an object of pathways with adjusted p-value < 0.05 for construction of network diagrams. This should be done differently next time, but too much work has been done to change it now.
fgseaWikiTop <- fgseaWiki %>%
dplyr::filter(padj < 0.05)
fgseaWikiTop %>%
dplyr::select(-leadingEdge, -nMoreExtreme) %>%
pander(
style = "rmarkdown",
split.tables = Inf,
justify = "lrrrrrr",
caption = paste(
"The", nrow(.), "most significantly enriched Wiki pathways.",
"This corresponds to an FDR of", percent(max(.$FDR)))
)
| pathway | pval | FDR | ES | NES | size | padj |
|---|---|---|---|---|---|---|
| Metapathway biotransformation Phase I and II | 1.066e-05 | 0.001206 | -0.4543 | -1.672 | 585 | 0.005595 |
| Nuclear Receptors Meta-Pathway | 1.07e-05 | 0.001206 | -0.4985 | -1.831 | 564 | 0.005619 |
| NRF2 pathway | 1.148e-05 | 0.001206 | -0.6226 | -2.209 | 317 | 0.006028 |
| Glucuronidation | 1.169e-05 | 0.001206 | -0.6133 | -2.154 | 278 | 0.006136 |
| Amino Acid metabolism | 1.327e-05 | 0.001206 | -0.6151 | -1.964 | 109 | 0.006969 |
| Retinoblastoma Gene in Cancer | 1.379e-05 | 0.001206 | -0.6882 | -2.117 | 82 | 0.007238 |
| Cell Cycle | 3.971e-05 | 0.002962 | -0.5792 | -1.856 | 112 | 0.02085 |
| Glutathione metabolism | 4.514e-05 | 0.002962 | -0.7314 | -1.99 | 38 | 0.0237 |
| G1 to S cell cycle control | 5.727e-05 | 0.003341 | -0.6812 | -2.001 | 60 | 0.03007 |
| Glycolysis and Gluconeogenesis | 8.548e-05 | 0.004488 | -0.6523 | -1.926 | 62 | 0.04488 |
# Make a table plot of significant WikiPathways pathways
if (interactive()) grid::grid.newpage()
plotGseaTable(
wiki[fgseaWikiTop$pathway], ranks, fgseaWiki, gseaParam = 0.5
)
Expression of fthl31 across all samples.
Expression of tfa across all samples.
Expression of tfr1b across all samples.
ireGR <- list.files(pattern = "gff.gz") %>%
sapply(rtracklayer::import.gff, simplify = FALSE) %>%
lapply(function(x){
tibble(
tx_id = seqnames(x) %>% as.character(),
quality = x$quality
) %>%
left_join(
mcols(transGR) %>% as.data.frame()
) %>%
mutate(
quality = factor(
quality, levels = c("Low", "Medium", "High")
)
) %>%
arrange(gene_id, desc(quality)) %>%
distinct(gene_id, .keep_all = TRUE) %>%
dplyr::select(tx_id, gene_id, quality)
})
names(ireGR) <- str_extract(names(ireGR), "utr[35]")
ireHigh <- lapply(ireGR, subset, quality == "High")
ireByGene <- c(
names(ireGR) %>%
lapply(function(x){
mutate(ireGR[[x]], Type = paste0(x, "_All"))
}),
names(ireHigh) %>%
lapply(function(x){
mutate(ireHigh[[x]], Type = x)
})
) %>%
bind_rows() %>%
split(f = .$gene_id) %>%
lapply(function(x){
unique(x$Type)
})
ireGoseq <- pwf %>%
goseq(gene2cat = ireByGene) %>%
as_tibble %>%
mutate(FDR = p.adjust(over_represented_pvalue, method = "fdr"))
| Pathway | numDEInCat | numInCat | PValue | FDR |
|---|---|---|---|---|
| utr5_All | 6 | 306 | 0.1834 | 0.7334 |
| utr5 | 1 | 39 | 0.409 | 0.8179 |
| utr3 | 1 | 72 | 0.6134 | 0.8179 |
| utr3_All | 9 | 883 | 0.9468 | 0.9468 |
ireGSEA <- c(
names(ireGR) %>%
lapply(function(x){
mutate(ireGR[[x]], Type = paste0(x, "_All"))
}),
names(ireHigh) %>%
lapply(function(x){
mutate(ireHigh[[x]], Type = x)
})
) %>%
bind_rows() %>%
split(f = .$Type) %>%
lapply(function(x){
unique(x$gene_id)
})
fgseaIRE <- fgsea(ireGSEA, ranks, nperm=1e5) %>%
as_tibble() %>%
dplyr::rename(FDR = padj) %>%
mutate(padj = p.adjust(pval, "bonferroni")) %>%
dplyr::arrange(pval)
fgseaIRE %>%
dplyr::select(-leadingEdge, -nMoreExtreme) %>%
pander(
style = "rmarkdown",
split.tables = Inf,
justify = "lrrrrrr",
caption = paste(
"The", nrow(.), "GSEA analysis of IRE enrichment", percent(max(.$FDR)))
)
| pathway | pval | FDR | ES | NES | size | padj |
|---|---|---|---|---|---|---|
| utr3 | 0.1586 | 0.5451 | -0.4057 | -1.224 | 72 | 0.6346 |
| utr5 | 0.2726 | 0.5451 | 0.3681 | 1.12 | 39 | 1 |
| utr3_All | 0.6418 | 0.6589 | -0.2567 | -0.9589 | 883 | 1 |
| utr5_All | 0.6589 | 0.6589 | -0.2621 | -0.9276 | 306 | 1 |
pander(sessionInfo())
R version 3.6.0 (2019-04-26)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
locale: en_AU.UTF-8||en_AU.UTF-8||en_AU.UTF-8||C||en_AU.UTF-8||en_AU.UTF-8
attached base packages: splines, stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: pathview(v.1.24.0), org.Dr.eg.db(v.3.8.2), goseq(v.1.36.0), geneLenDataBase(v.1.20.0), BiasedUrn(v.1.07), kableExtra(v.1.1.0), cqn(v.1.30.0), quantreg(v.5.51), SparseM(v.1.77), preprocessCore(v.1.46.0), nor1mix(v.1.3-0), mclust(v.5.4.5), RUVSeq(v.1.18.0), EDASeq(v.2.18.0), ShortRead(v.1.42.0), GenomicAlignments(v.1.20.1), SummarizedExperiment(v.1.14.1), DelayedArray(v.0.10.0), matrixStats(v.0.54.0), Rsamtools(v.2.0.0), GenomicRanges(v.1.36.0), GenomeInfoDb(v.1.20.0), Biostrings(v.2.52.0), XVector(v.0.24.0), BiocParallel(v.1.18.1), pheatmap(v.1.0.12), fgsea(v.1.10.1), Rcpp(v.1.0.2), tidygraph(v.1.1.2), ggraph(v.1.0.2), plyr(v.1.8.4), org.Hs.eg.db(v.3.8.2), AnnotationDbi(v.1.46.1), IRanges(v.2.18.2), S4Vectors(v.0.22.0), variancePartition(v.1.14.0), Biobase(v.2.44.0), foreach(v.1.4.7), here(v.0.1), scales(v.1.0.0), qvalue(v.2.16.0), swfdr(v.1.10.0), ggrepel(v.0.8.1), RColorBrewer(v.1.1-2), pander(v.0.6.3), magrittr(v.1.5), forcats(v.0.4.0), stringr(v.1.4.0), dplyr(v.0.8.3), purrr(v.0.3.2), readr(v.1.3.1), tidyr(v.0.8.3), tibble(v.2.1.3), ggplot2(v.3.2.1), tidyverse(v.1.2.1), AnnotationHub(v.2.16.0), BiocFileCache(v.1.8.0), dbplyr(v.1.4.2), BiocGenerics(v.0.30.0), edgeR(v.3.26.7), biomaRt(v.2.40.4) and limma(v.3.40.6)
loaded via a namespace (and not attached): utf8(v.1.1.4), R.utils(v.2.9.0), tidyselect(v.0.2.5), lme4(v.1.1-21), RSQLite(v.2.1.2), grid(v.3.6.0), DESeq(v.1.36.0), munsell(v.0.5.0), codetools(v.0.2-16), withr(v.2.1.2), colorspace(v.1.4-1), highr(v.0.8), knitr(v.1.24), rstudioapi(v.0.10), labeling(v.0.3), KEGGgraph(v.1.44.0), GenomeInfoDbData(v.1.2.1), hwriter(v.1.3.2), polyclip(v.1.10-0), bit64(v.0.9-7), farver(v.1.1.0), rprojroot(v.1.3-2), vctrs(v.0.2.0), generics(v.0.0.2), xfun(v.0.9), R6(v.2.4.0), doParallel(v.1.0.15), locfit(v.1.5-9.1), bitops(v.1.0-6), assertthat(v.0.2.1), promises(v.1.0.1), gtable(v.0.3.0), rlang(v.0.4.0), MatrixModels(v.0.4-1), zeallot(v.0.1.0), genefilter(v.1.66.0), rtracklayer(v.1.44.3), lazyeval(v.0.2.2), broom(v.0.5.2), BiocManager(v.1.30.4), yaml(v.2.2.0), reshape2(v.1.4.3), modelr(v.0.1.5), GenomicFeatures(v.1.36.4), backports(v.1.1.4), httpuv(v.1.5.1), tools(v.3.6.0), gplots(v.3.0.1.1), progress(v.1.2.2), zlibbioc(v.1.30.0), RCurl(v.1.95-4.12), prettyunits(v.1.0.2), viridis(v.0.5.1), haven(v.2.1.1), colorRamps(v.2.3), data.table(v.1.12.2), aroma.light(v.3.14.0), hms(v.0.5.1), mime(v.0.7), evaluate(v.0.14), xtable(v.1.8-4), pbkrtest(v.0.4-7), XML(v.3.98-1.20), readxl(v.1.3.1), gridExtra(v.2.3), compiler(v.3.6.0), KernSmooth(v.2.23-15), crayon(v.1.3.4), minqa(v.1.2.4), R.oo(v.1.22.0), htmltools(v.0.3.6), mgcv(v.1.8-28), later(v.0.8.0), geneplotter(v.1.62.0), lubridate(v.1.7.4), DBI(v.1.0.0), tweenr(v.1.0.1), MASS(v.7.3-51.4), rappdirs(v.0.3.1), boot(v.1.3-23), Matrix(v.1.2-17), cli(v.1.1.0), R.methodsS3(v.1.7.1), gdata(v.2.18.0), igraph(v.1.2.4.1), pkgconfig(v.2.0.2), xml2(v.1.2.2), annotate(v.1.62.0), webshot(v.0.5.1), rvest(v.0.3.4), digest(v.0.6.20), graph(v.1.62.0), rmarkdown(v.1.15), cellranger(v.1.1.0), fastmatch(v.1.1-0), curl(v.4.0), shiny(v.1.3.2), gtools(v.3.8.1), nloptr(v.1.2.1), nlme(v.3.1-141), jsonlite(v.1.6), fansi(v.0.4.0), viridisLite(v.0.3.0), pillar(v.1.4.2), lattice(v.0.20-38), KEGGREST(v.1.24.1), GO.db(v.3.8.2), httr(v.1.4.1), survival(v.2.44-1.1), interactiveDisplayBase(v.1.22.0), glue(v.1.3.1), png(v.0.1-7), iterators(v.1.0.12), Rgraphviz(v.2.28.0), bit(v.1.1-14), ggforce(v.0.3.1), stringi(v.1.4.3), blob(v.1.2.0), latticeExtra(v.0.6-28), caTools(v.1.17.1.2) and memoise(v.1.1.0)